***
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
Biblioteki wykorzystane do realizacji projektu:
library(survival)
library(survminer)
library(survMisc)
library(dplyr)
library(gtools)
library(ggplot2)
library(corrplot)
library(gridExtra)
library(extrafont)
library(VIM)
library(extrafont)
library(mfp)
library(splines)
library(CoxR2)
library(ipred)
library(Rcpp)
library(RColorBrewer)
library(knitr)
library(kableExtra)
library(MASS)
data(pbc)
dane <- pbc
dane$cens <- ifelse(dane$status=="0" | dane$status=="1",0,1)
dane <- dane[1:312,]
head(dane,10)
## id time status trt age sex ascites hepato spiders edema bili chol
## 1 1 400 2 1 58.76523 f 1 1 1 1.0 14.5 261
## 2 2 4500 0 1 56.44627 f 0 1 1 0.0 1.1 302
## 3 3 1012 2 1 70.07255 m 0 0 0 0.5 1.4 176
## 4 4 1925 2 1 54.74059 f 0 1 1 0.5 1.8 244
## 5 5 1504 1 2 38.10541 f 0 1 1 0.0 3.4 279
## 6 6 2503 2 2 66.25873 f 0 1 0 0.0 0.8 248
## 7 7 1832 0 2 55.53457 f 0 1 0 0.0 1.0 322
## 8 8 2466 2 2 53.05681 f 0 0 0 0.0 0.3 280
## 9 9 2400 2 1 42.50787 f 0 0 1 0.0 3.2 562
## 10 10 51 2 2 70.55989 f 1 0 1 1.0 12.6 200
## albumin copper alk.phos ast trig platelet protime stage cens
## 1 2.60 156 1718.0 137.95 172 190 12.2 4 1
## 2 4.14 54 7394.8 113.52 88 221 10.6 3 0
## 3 3.48 210 516.0 96.10 55 151 12.0 4 1
## 4 2.54 64 6121.8 60.63 92 183 10.3 4 1
## 5 3.53 143 671.0 113.15 72 136 10.9 3 0
## 6 3.98 50 944.0 93.00 63 NA 11.0 3 1
## 7 4.09 52 824.0 60.45 213 204 9.7 3 0
## 8 4.00 52 4651.2 28.38 189 373 11.0 3 1
## 9 3.08 79 2276.0 144.15 88 251 11.0 2 1
## 10 2.74 140 918.0 147.25 143 302 11.5 4 1
tail(dane,10)
## id time status trt age sex ascites hepato spiders edema bili chol
## 303 303 1250 0 2 60.65982 f 0 1 1 0 1.0 372
## 304 304 1230 0 1 35.53457 f 0 0 0 0 0.5 219
## 305 305 1216 0 2 43.06639 f 0 1 1 0 2.9 426
## 306 306 1216 0 2 56.39151 f 0 1 0 0 0.6 239
## 307 307 1149 0 2 30.57358 f 0 0 0 0 0.8 273
## 308 308 1153 0 1 61.18275 f 0 1 0 0 0.4 246
## 309 309 994 0 2 58.29979 f 0 0 0 0 0.4 260
## 310 310 939 0 1 62.33265 f 0 0 0 0 1.7 434
## 311 311 839 0 1 37.99863 f 0 0 0 0 2.0 247
## 312 312 788 0 2 33.15264 f 0 0 1 0 6.4 576
## albumin copper alk.phos ast trig platelet protime stage cens
## 303 3.25 108 1190 140 55 248 10.6 4 0
## 304 3.93 22 663 45 75 246 10.8 3 0
## 305 3.61 73 5184 288 144 275 10.6 3 0
## 306 3.45 31 1072 55 64 227 10.7 2 0
## 307 3.56 52 1282 130 59 344 10.5 2 0
## 308 3.58 24 797 91 113 288 10.4 2 0
## 309 2.75 41 1166 70 82 231 10.8 2 0
## 310 3.35 39 1713 171 100 234 10.2 2 0
## 311 3.16 69 1050 117 88 335 10.5 2 0
## 312 3.79 186 2115 136 149 200 10.8 2 0
NA_count <- colSums(is.na(dane))
NA_count
## id time status trt age sex ascites hepato
## 0 0 0 0 0 0 0 0
## spiders edema bili chol albumin copper alk.phos ast
## 0 0 0 28 0 2 0 0
## trig platelet protime stage cens
## 30 4 0 0 0
barplot(NA_count)
___
daneKNN <- kNN(dane, variable = c("chol", "copper", "trig", "platelet"), k = 5)
daneKNN <- daneKNN[,1:21,drop = F]
daneKNN$age <- round(daneKNN$age,0)
levels(daneKNN$sex) <- c(1,0)
daneKNN$status <- as.factor(daneKNN$status)
daneKNN$trt <- as.factor(daneKNN$trt)
daneKNN$ascites <- as.factor(daneKNN$ascites)
daneKNN$spiders <- as.factor(daneKNN$spiders)
daneKNN$hepato <- as.factor(daneKNN$hepato)
daneKNN$stage <- as.factor(daneKNN$stage)
daneKNN$edema <- as.factor(daneKNN$edema)
daneKNN$cens <- as.factor(daneKNN$cens)
str(daneKNN)
## 'data.frame': 312 obs. of 21 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ time : int 400 4500 1012 1925 1504 2503 1832 2466 2400 51 ...
## $ status : Factor w/ 3 levels "0","1","2": 3 1 3 3 2 3 1 3 3 3 ...
## $ trt : Factor w/ 2 levels "1","2": 1 1 1 1 2 2 2 2 1 2 ...
## $ age : num 59 56 70 55 38 66 56 53 43 71 ...
## $ sex : Factor w/ 2 levels "1","0": 2 2 1 2 2 2 2 2 2 2 ...
## $ ascites : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 2 ...
## $ hepato : Factor w/ 2 levels "0","1": 2 2 1 2 2 2 2 1 1 1 ...
## $ spiders : Factor w/ 2 levels "0","1": 2 2 1 2 2 1 1 1 2 2 ...
## $ edema : Factor w/ 3 levels "0","0.5","1": 3 1 2 2 1 1 1 1 1 3 ...
## $ bili : num 14.5 1.1 1.4 1.8 3.4 0.8 1 0.3 3.2 12.6 ...
## $ chol : int 261 302 176 244 279 248 322 280 562 200 ...
## $ albumin : num 2.6 4.14 3.48 2.54 3.53 3.98 4.09 4 3.08 2.74 ...
## $ copper : int 156 54 210 64 143 50 52 52 79 140 ...
## $ alk.phos: num 1718 7395 516 6122 671 ...
## $ ast : num 137.9 113.5 96.1 60.6 113.2 ...
## $ trig : int 172 88 55 92 72 63 213 189 88 143 ...
## $ platelet: int 190 221 151 183 136 233 204 373 251 302 ...
## $ protime : num 12.2 10.6 12 10.3 10.9 11 9.7 11 11 11.5 ...
## $ stage : Factor w/ 4 levels "1","2","3","4": 4 3 4 4 3 3 3 3 2 4 ...
## $ cens : Factor w/ 2 levels "0","1": 2 1 2 2 1 2 1 2 2 2 ...
summary(daneKNN)
## id time status trt age sex
## Min. : 1.00 Min. : 41 0:168 1:158 Min. :26.00 1: 36
## 1st Qu.: 78.75 1st Qu.:1191 1: 19 2:154 1st Qu.:42.00 0:276
## Median :156.50 Median :1840 2:125 Median :50.00
## Mean :156.50 Mean :2006 Mean :50.03
## 3rd Qu.:234.25 3rd Qu.:2697 3rd Qu.:57.00
## Max. :312.00 Max. :4556 Max. :78.00
## ascites hepato spiders edema bili chol
## 0:288 0:152 0:222 0 :263 Min. : 0.300 Min. : 120.0
## 1: 24 1:160 1: 90 0.5: 29 1st Qu.: 0.800 1st Qu.: 252.0
## 1 : 20 Median : 1.350 Median : 309.0
## Mean : 3.256 Mean : 364.8
## 3rd Qu.: 3.425 3rd Qu.: 394.2
## Max. :28.000 Max. :1775.0
## albumin copper alk.phos ast
## Min. :1.96 Min. : 4.00 Min. : 289.0 Min. : 26.35
## 1st Qu.:3.31 1st Qu.: 41.75 1st Qu.: 871.5 1st Qu.: 80.60
## Median :3.55 Median : 73.00 Median : 1259.0 Median :114.70
## Mean :3.52 Mean : 97.73 Mean : 1982.7 Mean :122.56
## 3rd Qu.:3.80 3rd Qu.:123.25 3rd Qu.: 1980.0 3rd Qu.:151.90
## Max. :4.64 Max. :588.00 Max. :13862.4 Max. :457.25
## trig platelet protime stage cens
## Min. : 33.00 Min. : 62.0 Min. : 9.00 1: 16 0:187
## 1st Qu.: 84.75 1st Qu.:200.0 1st Qu.:10.00 2: 67 1:125
## Median :107.50 Median :256.0 Median :10.60 3:120
## Mean :122.34 Mean :261.9 Mean :10.73 4:109
## 3rd Qu.:146.00 3rd Qu.:322.5 3rd Qu.:11.10
## Max. :598.00 Max. :563.0 Max. :17.10
a <- ggplot(data = daneKNN, aes(x = age)) + geom_histogram(alpha = 0.7, binwidth = 5, fill="darkorange") + theme_minimal() +
ggtitle('Age') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "N")
b <- ggplot(data = daneKNN, aes(x = bili)) + geom_histogram(alpha = 0.7, binwidth = 2, fill="mediumpurple3") + theme_minimal() +
ggtitle('Bilirunbin') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "N")
c <- ggplot(data = daneKNN, aes(x = chol)) + geom_histogram(alpha = 0.7, binwidth = 50, fill="seagreen4") + theme_minimal() +
ggtitle('Cholesterol') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "N")
d <- ggplot(data = daneKNN, aes(x = albumin)) + geom_histogram(alpha = 0.7, binwidth = 0.2, fill="dodgerblue3") + theme_minimal() +
ggtitle('Albumin') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "N")
a1 <- ggplot(data = daneKNN, aes(x = copper)) + geom_histogram(alpha = 0.7, binwidth = 30, fill="indianred4") + theme_minimal() +
ggtitle('Copper') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "N")
b1 <- ggplot(data = daneKNN, aes(x = alk.phos)) + geom_histogram(alpha = 0.7, binwidth = 400, fill="royalblue3") + theme_minimal() +
ggtitle('Alkaline phos.') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=20, family="serif")) +
labs(y = "N")
c1 <- ggplot(data = daneKNN, aes(x = ast)) + geom_histogram(alpha = 0.7, binwidth = 20, fill="goldenrod2") + theme_minimal() +
ggtitle('Aspartate amino.') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=20, family="serif")) +
labs(y = "N")
d1 <- ggplot(data = daneKNN, aes(x = trig)) + geom_histogram(alpha = 0.7, binwidth = 20, fill="cadetblue3") + theme_minimal() +
ggtitle('Triglycerides') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=20, family="serif")) +
labs(y = "N")
a2 <- ggplot(data = daneKNN, aes(x = platelet)) + geom_histogram(alpha = 0.7, binwidth = 25, fill="orangered3") + theme_minimal() +
ggtitle('Platelet') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=20, family="serif")) +
labs(y = "N")
b2 <- ggplot(data = daneKNN, aes(x = protime)) + geom_histogram(alpha = 0.7, binwidth = 0.5, fill="darkolivegreen4") + theme_minimal() +
ggtitle('Protime') +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=20, family="serif")) +
labs(y = "N")
c2 <- ggplot(data = daneKNN, aes(x = time, fill = cens)) + geom_histogram(alpha = 0.7, binwidth = 200) + facet_grid(~cens) + theme_minimal() +
ggtitle('Time for cens & event') +
scale_fill_manual(values = c("coral3","slateblue3")) +
theme(axis.title.y = element_text(color="Grey23", size=11),
axis.text.y = element_text(size=9),
axis.title.x = element_blank(),
plot.title = element_text(color="gray26", size=20, family="serif")) +
labs(y = "N")
grid.arrange(a,b,c,d)
grid.arrange(a1,b1,c1,d1)
grid.arrange(a2,b2)
c2
e <- daneKNN %>%
count(sex) %>%
ggplot() + geom_col(aes(x = sex, y = n, fill = sex), alpha = 0.7)+ geom_label(aes(x = sex, y = n, label = n)) + scale_fill_manual(values = c("navajowhite3","plum4")) +
theme_minimal() +
ggtitle('Number of patients by gender') +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=19, family="serif"))
f <- daneKNN %>%
count(trt) %>%
ggplot() + geom_col(aes(x = trt, y = n, fill = trt), alpha = 0.7)+ geom_label(aes(x = trt, y = n, label = n)) + scale_fill_manual(values = c("lightseagreen","cornsilk3")) +
theme_minimal() +
ggtitle('Number of patients by treatment') +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=19, family="serif"))
g <- daneKNN %>%
count(ascites) %>%
ggplot() + geom_col(aes(x = ascites, y = n, fill = ascites), alpha = 0.7)+ geom_label(aes(x = ascites, y = n, label = n)) + scale_fill_manual(values = c("darkseagreen4","coral3")) +
theme_minimal() +
ggtitle('Number of patients by ascites presence') +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=19, family="serif"))
h <- daneKNN %>%
count(hepato) %>%
ggplot() + geom_col(aes(x = hepato, y = n, fill = hepato), alpha = 0.7)+ geom_label(aes(x = hepato, y = n, label = n)) + scale_fill_manual(values = c("palegreen4", "indianred3")) +
theme_minimal() +
ggtitle('Number of patients by hepato presence') +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=19, family="serif"))
e1 <- daneKNN %>%
count(edema) %>%
ggplot() + geom_col(aes(x = edema, y = n, fill = edema), alpha = 0.7)+ geom_label(aes(x = edema, y = n, label = n)) + scale_fill_manual(values = c("springgreen3","tan3","tomato3")) +
theme_minimal() +
ggtitle('Number of patients by edema presence') +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=19, family="serif"))
f1 <- daneKNN %>%
count(stage) %>%
ggplot() + geom_col(aes(x = stage, y = n, fill = stage), alpha = 0.7)+ geom_label(aes(x = stage, y = n, label = n)) + scale_fill_manual(values = c("palegreen3", "skyblue3","salmon3","brown3")) +
theme_minimal() +
ggtitle('Number of patients by stage of disease') +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=19, family="serif"))
g1 <- daneKNN %>%
count(spiders) %>%
ggplot() + geom_col(aes(x = spiders, y = n, fill = spiders), alpha = 0.7)+ geom_label(aes(x = spiders, y = n, label = n)) + scale_fill_manual(values = c("forestgreen", "chocolate3")) +
theme_minimal() +
ggtitle('Number of patients spiders presence') +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=19, family="serif"))
h1 <- daneKNN %>%
count(cens) %>%
ggplot() + geom_col(aes(x = cens, y = n, fill = cens), alpha = 0.7)+ geom_label(aes(x = cens, y = n, label = n)) + scale_fill_manual(values = c("coral3","slateblue3")) +
theme_minimal() +
ggtitle('Number of patients by cens or event') +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=19, family="serif"))
grid.arrange(e,f)
grid.arrange(g,h)
grid.arrange(e1,f1)
grid.arrange(g1,h1)
z <- ggplot(daneKNN, aes(x=stage, y = age, fill=stage)) + geom_boxplot(size=1.2, alpha=0.5) +
scale_fill_manual(values = c("palegreen3", "skyblue3","salmon3","brown3")) + theme_minimal() +
ggtitle("Age by stage") +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "Age")
x <- ggplot(daneKNN, aes(x=stage, y = bili, fill=stage)) + geom_boxplot(size=1.2, alpha=0.5) +
scale_fill_manual(values = c("palegreen3", "skyblue3","salmon3","brown3")) + theme_minimal() +
ggtitle("Bilirunbin by stage") +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "Bilirunbin")
z1 <- ggplot(daneKNN, aes(x=stage, y = time, fill=stage)) + geom_boxplot(size=1.2, alpha=0.5) +
scale_fill_manual(values = c("palegreen3", "skyblue3","salmon3","brown3")) + theme_minimal() +
ggtitle("Time by stage") +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "Time") + facet_grid(~cens)
x1 <- ggplot(daneKNN, aes(x=ascites, y = bili, fill=ascites)) + geom_boxplot(size=1.2, alpha=0.5) +
scale_fill_manual(values = c("darkseagreen4","coral3")) + theme_minimal() +
ggtitle("Bilirunbin by ascites presence") +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "Bilirunbin")
x2 <- ggplot(daneKNN, aes(x=hepato, y = bili, fill=hepato)) + geom_boxplot(size=1.2, alpha=0.5) +
scale_fill_manual(values = c("palegreen4", "indianred3")) + theme_minimal() +
ggtitle("Bilirunbin by hepato presence") +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "Bilirunbin")
z2 <- ggplot(daneKNN, aes(x=hepato, y = copper, fill=hepato)) + geom_boxplot(size=1.2, alpha=0.5) +
scale_fill_manual(values = c("palegreen4", "indianred3")) + theme_minimal() +
ggtitle("Copper by hepato presence") +
theme(axis.title.y = element_text(color="Grey23", size=12),
axis.title.x=element_blank(),
legend.title = element_text(size=12),
legend.text = element_text(size=8),
legend.position = "right",
legend.justification = c(0.94,0.94),
legend.background = element_rect(fill="grey88",
size=0.5, linetype="solid",
colour ="gray26"),
plot.title = element_text(color="gray26", size=18, family="serif")) +
labs(y = "Copper")
grid.arrange(z,x)
grid.arrange(z1,x1)
grid.arrange(z2,x2)
tab1 <- daneKNN %>%
group_by(sex) %>%
summarise(N = n(), Mean = round(mean(age),2), Min = min(age), Max = max(age), Sd = round(sd(age),2))
tab1 %>%
kbl() %>%
kable_styling(full_width = T, html_font = "Cambria", fixed_thead = T, font_size = 16)
| sex | N | Mean | Min | Max | Sd |
|---|---|---|---|---|---|
| 1 | 36 | 56.19 | 33 | 78 | 11.54 |
| 0 | 276 | 49.22 | 26 | 77 | 10.20 |
tab2 <- daneKNN %>%
group_by(edema) %>%
summarise(N = n(), Mean = round(mean(bili),2), Min = min(bili), Max = max(bili), Sd = round(sd(bili),2))
tab2 %>%
kbl() %>%
kable_styling(full_width = T, html_font = "Cambria", fixed_thead = T, font_size = 16)
| edema | N | Mean | Min | Max | Sd |
|---|---|---|---|---|---|
| 0 | 263 | 2.52 | 0.3 | 25.5 | 3.35 |
| 0.5 | 29 | 5.76 | 0.6 | 28.0 | 7.24 |
| 1 | 20 | 9.26 | 0.8 | 22.5 | 7.00 |
tab3 <- daneKNN %>%
group_by(stage) %>%
summarise(N = n(), Mean = round(mean(bili),2), Min = min(bili), Max = max(bili), Sd = round(sd(bili),2))
tab3 %>%
kbl() %>%
kable_styling(full_width = T, html_font = "Cambria", fixed_thead = T, font_size = 16)
| stage | N | Mean | Min | Max | Sd |
|---|---|---|---|---|---|
| 1 | 16 | 1.07 | 0.5 | 6.0 | 1.33 |
| 2 | 67 | 2.13 | 0.3 | 25.5 | 3.82 |
| 3 | 120 | 2.90 | 0.3 | 28.0 | 4.34 |
| 4 | 109 | 4.66 | 0.6 | 24.5 | 5.05 |
tab4 <- daneKNN %>%
group_by(sex) %>%
summarise(N = n(), Mean = round(mean(chol),2), Min = min(chol), Max = max(chol), Sd = round(sd(chol),2))
tab4 %>%
kbl() %>%
kable_styling(full_width = T, html_font = "Cambria", fixed_thead = T, font_size = 16)
| sex | N | Mean | Min | Max | Sd |
|---|---|---|---|---|---|
| 1 | 36 | 357.61 | 151 | 1000 | 178.80 |
| 0 | 276 | 365.79 | 120 | 1775 | 228.08 |
Tu będzie tekst
tab5 <- daneKNN %>%
group_by(sex) %>%
summarise(N = n(), Mean = round(mean(trig),2), Min = min(trig), Max = max(trig), Sd = round(sd(trig),2))
tab5 %>%
kbl() %>%
kable_styling(full_width = T, html_font = "Cambria", fixed_thead = T, font_size = 16)
| sex | N | Mean | Min | Max | Sd |
|---|---|---|---|---|---|
| 1 | 36 | 132.56 | 49 | 242 | 51.68 |
| 0 | 276 | 121.00 | 33 | 598 | 64.12 |
tab6 <- daneKNN %>%
group_by(stage) %>%
summarise(N = n(), Mean = round(mean(trig),2), Min = min(trig), Max = max(trig), Sd = round(sd(trig),2))
tab6 %>%
kbl() %>%
kable_styling(full_width = T, html_font = "Cambria", fixed_thead = T, font_size = 16)
| stage | N | Mean | Min | Max | Sd |
|---|---|---|---|---|---|
| 1 | 16 | 90.75 | 55 | 188 | 32.22 |
| 2 | 67 | 112.63 | 44 | 319 | 53.19 |
| 3 | 120 | 128.68 | 46 | 382 | 56.91 |
| 4 | 109 | 125.96 | 33 | 598 | 75.28 |
doKor <- daneKNN[,c(5, 12,13,14,15,16,17,18,19)]
corMat <- cor(doKor)
#corrplot(corMat,method="number", tl.col = 'black')
corrplot(corMat, method = 'color',addCoef.col = 'black', order = 'AOE', tl.col = 'black', col=brewer.pal(n=10, name="PuOr"))
daneKNN$cens <- as.character(daneKNN$cens)
daneKNN$cens <- as.numeric(daneKNN$cens)
Surv(daneKNN$time, daneKNN$cens)
## [1] 400 4500+ 1012 1925 1504+ 2503 1832+ 2466 2400 51 3762 304
## [13] 3577+ 1217 3584 3672+ 769 131 4232+ 1356 3445+ 673 264 4079
## [25] 4127+ 1444 77 549 4509+ 321 3839 4523+ 3170 3933+ 2847 3611+
## [37] 223 3244 2297 4467+ 1350 4453+ 4556+ 3428 4025+ 2256 2576+ 4427+
## [49] 708 2598 3853 2386 1000 1434 1360 1847 3282 4459+ 2224 4365+
## [61] 4256+ 3090 859 1487 3992+ 4191 2769 4039+ 1170 3458+ 4196+ 4184+
## [73] 4190+ 1827 1191 71 326 1690 3707+ 890 2540 3574 4050+ 4032+
## [85] 3358 1657 198 2452+ 1741 2689 460 388 3913+ 750 130 3850+
## [97] 611 3823+ 3820+ 552 3581+ 3099+ 110 3086 3092+ 3222 3388+ 2583
## [109] 2504+ 2105 2350+ 3445 980 3395 3422+ 3336+ 1083 2288 515 2033+
## [121] 191 3297+ 971 3069+ 2468+ 824 3255+ 1037 3239+ 1413 850 2944+
## [133] 2796 3149+ 3150+ 3098+ 2990+ 1297 2106+ 3059+ 3050+ 2419 786 943
## [145] 2976+ 2615+ 2995+ 1427 762 2891+ 2870+ 1152 2863+ 140 2666+ 853
## [157] 2835+ 2475+ 1536 2772+ 2797+ 186 2055 264 1077 2721+ 1682 2713+
## [169] 1212 2692+ 2574+ 2301+ 2657+ 2644+ 2624+ 1492 2609+ 2580+ 2573+ 2563+
## [181] 2556+ 2555+ 2241+ 974 2527+ 1576 733 2332+ 2456+ 2504+ 216 2443+
## [193] 797 2449+ 2330+ 2363+ 2365+ 2357+ 1592+ 2318+ 2294+ 2272+ 2221+ 2090
## [205] 2081 2255+ 2171+ 904 2216+ 2224+ 2195+ 2176+ 2178+ 1786 1080 2168+
## [217] 790 2170+ 2157+ 1235 2050+ 597 334 1945+ 2022+ 1978+ 999 1967+
## [229] 348 1979+ 1165 1951+ 1932+ 1776+ 1882+ 1908+ 1882+ 1874+ 694 1831+
## [241] 837+ 1810+ 930 1690 1790+ 1435+ 732+ 1785+ 1783+ 1769+ 1457+ 1770+
## [253] 1765+ 737+ 1735+ 1701+ 1614+ 1702+ 1615+ 1656+ 1677+ 1666+ 1301+ 1542+
## [265] 1084+ 1614+ 179 1191 1363+ 1568+ 1569+ 1525+ 1558+ 1447+ 1349+ 1481+
## [277] 1434+ 1420+ 1433+ 1412+ 41 1455+ 1030+ 1418+ 1401+ 1408+ 1234+ 1067+
## [289] 799 1363+ 901+ 1329+ 1320+ 1302+ 877+ 1321+ 533+ 1300+ 1293+ 207
## [301] 1295+ 1271+ 1250+ 1230+ 1216+ 1216+ 1149+ 1153+ 994+ 939+ 839+ 788+
daneKNN$cens <- as.character(daneKNN$cens)
daneKNN$cens <- as.numeric(daneKNN$cens)
pbcKM <- survfit(Surv(time, cens) ~ 1, conf.type="plain", data=daneKNN) #KM
pbcKM1 <- survfit(Surv(time, cens) ~ 1, conf.type="log", data=daneKNN)
pbcNELS=survfit(Surv(time,cens) ~ 1, conf.type ="plain", type="fleming-harrington",data=daneKNN)
splots <- list()
splots[[1]] <- ggsurvplot(
fit = pbcKM,
xlab = "Days",
ylab = "Overall survival probability with Kaplan - Meier estimator and plain confidence interval")
splots[[2]] <- ggsurvplot(
fit = pbcKM1,
xlab = "Days",
ylab = "Overall survival probability with with Kaplan - Meier estimator and log confidence interval",
palette = "blue")
splots[[3]] <- ggsurvplot(
fit = pbcNELS,
xlab = "Days",
ylab = "Overall survival probability with Nelson - Aelen estimator",
palette = "limegreen")
arrange_ggsurvplots(splots, print = TRUE,
ncol = 3, nrow = 1)
summary(pbcKM)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## 312.0000 312.0000 312.0000 125.0000 2973.6112 101.8206 3395.0000 3086.0000
## 0.95UCL
## 3839.0000
summary(pbcKM1)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## 312.0000 312.0000 312.0000 125.0000 2973.6112 101.8206 3395.0000 3086.0000
## 0.95UCL
## 3853.0000
summary(pbcNELS)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## 312.0000 312.0000 312.0000 125.0000 2978.9327 102.2104 3395.0000 3086.0000
## 0.95UCL
## 3839.0000
daneKNN$Age_cat=quantcut(daneKNN$age,q=4)
daneKNN$bili_cat=quantcut(daneKNN$bili,q=4)
daneKNN$chol_cat=quantcut(daneKNN$chol,q=4)
daneKNN$albumin_cat=quantcut(daneKNN$albumin,q=4)
daneKNN$copper_cat=quantcut(daneKNN$copper,q=4)
daneKNN$alk_cat=quantcut(daneKNN$alk.phos,q=4)
daneKNN$ast_cat=quantcut(daneKNN$ast,q=4)
daneKNN$trig_cat=quantcut(daneKNN$trig,q=4)
daneKNN$plat_cat=quantcut(daneKNN$platelet,q=4)
daneKNN$pro_cat=quantcut(daneKNN$protime,q=4)
groupSplots1 <- list()
groupSplots1[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ sex, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("navajowhite3","plum4"))
groupSplots1[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ trt, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("lightseagreen","cornsilk3"))
arrange_ggsurvplots(groupSplots1, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
kmsex <- survfit(Surv(time, cens) ~ sex, data = daneKNN)
summary(kmsex)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL 0.95UCL
## sex=1 36 36 36 22 2479.267 281.2595 2386 1297 NA
## sex=0 276 276 276 103 3048.694 109.5150 3428 3170 NA
kmtrt <- survfit(Surv(time, cens) ~ trt, data = daneKNN)
summary(kmtrt)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL 0.95UCL
## trt=1 158 158 158 65 2949.313 141.7174 3282 2583 NA
## trt=2 154 154 154 60 3002.749 145.7727 3428 3090 NA
groupSplots2 <- list()
groupSplots2[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ ascites, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("darkseagreen4","coral3"))
groupSplots2[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ spiders, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("forestgreen", "chocolate3"))
arrange_ggsurvplots(groupSplots2, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
kmascites <- survfit(Surv(time, cens) ~ ascites, data = daneKNN)
summary(kmascites)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## ascites=0 288 288 288 102 3165.2653 102.3598 3584 3282
## ascites=1 24 24 24 23 856.1944 201.0002 368 223
## 0.95UCL
## ascites=0 NA
## ascites=1 1191
kmspiders <- survfit(Surv(time, cens) ~ spiders, data = daneKNN)
summary(kmspiders)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## spiders=0 222 222 222 73 3287.546 113.0711 3839 3282
## spiders=1 90 90 90 52 2153.799 188.2438 1847 1434
## 0.95UCL
## spiders=0 NA
## spiders=1 3244
groupSplots3 <- list()
groupSplots3[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ hepato, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("palegreen4", "indianred3"))
groupSplots3[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ edema, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("springgreen3","tan3","tomato3"))
arrange_ggsurvplots(groupSplots3, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
kmhepato <- survfit(Surv(time, cens) ~ hepato, data = daneKNN)
summary(kmhepato)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL 0.95UCL
## hepato=0 152 152 152 37 3606.189 129.0008 NA 3584 NA
## hepato=1 160 160 160 88 2372.598 138.0432 2256 1741 3244
kmedema <- survfit(Surv(time, cens) ~ edema, data = daneKNN)
summary(kmedema)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL
## edema=0 263 263 263 89 3231.814 104.2501 3584 3244
## edema=0.5 29 29 29 17 2269.608 350.2181 1576 1012
## edema=1 20 20 20 19 673.550 207.7994 299 179
## 0.95UCL
## edema=0 NA
## edema=0.5 NA
## edema=1 971
ggsurvplot(survfit(Surv(time, cens) ~ stage, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_bw(),
palette = c("palegreen3", "skyblue3","salmon3","brown3"))
kmstage <- survfit(Surv(time, cens) ~ stage, data = daneKNN)
summary(kmstage)$table
## records n.max n.start events rmean se(rmean) median 0.95LCL 0.95UCL
## stage=1 16 16 16 1 4358.727 188.0922 NA NA NA
## stage=2 67 67 67 16 3636.000 179.4216 4079 3839 NA
## stage=3 120 120 120 43 3156.503 153.5608 3428 2847 NA
## stage=4 109 109 109 65 2101.587 173.8259 1682 1165 2796
groupSplots4 <- list()
groupSplots4[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ trig_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_minimal(),
palette = c("peachpuff3","lightgreen", "paleturquoise4", "orchid4"))
groupSplots4[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ Age_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_gray(),
palette = c("wheat3","seagreen3", "slateblue2", "sienna3"))
arrange_ggsurvplots(groupSplots4, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
groupSplots5 <- list()
groupSplots5[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ bili_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_minimal(),
palette = c("peachpuff3","lightgreen", "paleturquoise4", "orchid4"))
groupSplots5[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ chol_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_gray(),
palette = c("wheat3","seagreen3", "slateblue2", "sienna3"))
arrange_ggsurvplots(groupSplots5, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
groupSplots6 <- list()
groupSplots6[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ albumin_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_minimal(),
palette = c("peachpuff3","lightgreen", "paleturquoise4", "orchid4"))
groupSplots6[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ copper_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_gray(),
palette = c("wheat3","seagreen3", "slateblue2", "sienna3"))
arrange_ggsurvplots(groupSplots6, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
groupSplots7 <- list()
groupSplots7[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ alk_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_minimal(),
palette = c("peachpuff3","lightgreen", "paleturquoise4", "orchid4"))
groupSplots7[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ ast_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_gray(),
palette = c("wheat3","seagreen3", "slateblue2", "sienna3"))
arrange_ggsurvplots(groupSplots7, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
groupSplots8 <- list()
groupSplots8[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ plat_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_minimal(),
palette = c("peachpuff3","lightgreen", "paleturquoise4", "orchid4"))
groupSplots8[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ pro_cat, data = daneKNN),
pval = TRUE, conf.int = TRUE,
risk.table = TRUE,
risk.table.col = "strata",
linetype = "strata",
surv.median.line = "hv",
ggtheme = theme_gray(),
palette = c("wheat3","seagreen3", "slateblue2", "sienna3"))
arrange_ggsurvplots(groupSplots8, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
___
groupHplots <- list()
groupHplots[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ sex, data = daneKNN),
conf.int = TRUE,
risk.table.col = "strata",
ggtheme = theme_gray(),
palette = c("navajowhite3","plum4"),
fun = "cumhaz")
groupHplots[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ stage, data = daneKNN),
conf.int = TRUE,
risk.table.col = "strata",
ggtheme = theme_minimal(),
palette = c("palegreen3", "skyblue3","salmon3","brown3"),
fun = "cumhaz")
arrange_ggsurvplots(groupHplots, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
groupHplots <- list()
groupHplots[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ hepato, data = daneKNN),
conf.int = TRUE,
risk.table.col = "strata",
ggtheme = theme_gray(),
palette = c("palegreen4", "indianred3"),
fun = "cumhaz")
groupHplots[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ trig_cat, data = daneKNN),
conf.int = TRUE,
risk.table.col = "strata",
ggtheme = theme_minimal(),
palette = c("peachpuff3","lightgreen", "paleturquoise4", "orchid4"),
fun = "cumhaz")
arrange_ggsurvplots(groupHplots, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
test
groupHplots <- list()
groupHplots[[1]] <- ggsurvplot(survfit(Surv(time, cens) ~ copper_cat, data = daneKNN),
conf.int = TRUE,
risk.table.col = "strata",
ggtheme = theme_gray(),
palette = c("wheat3","seagreen3", "slateblue2", "sienna3"),
fun = "cumhaz")
groupHplots[[2]] <- ggsurvplot(survfit(Surv(time, cens) ~ bili_cat, data = daneKNN),
conf.int = TRUE,
risk.table.col = "strata",
ggtheme = theme_minimal(),
palette = c("peachpuff3","lightgreen", "paleturquoise4", "orchid4"),
fun = "cumhaz")
arrange_ggsurvplots(groupHplots, print = TRUE,
ncol = 2, nrow = 1, risk.table.height = 0.2)
___
Cox <- coxph(Surv(time,cens)~age+sex+ascites+hepato+spiders+bili+chol+albumin+copper+alk.phos+ast+trig+platelet+protime, data = daneKNN)
summary(Cox)
## Call:
## coxph(formula = Surv(time, cens) ~ age + sex + ascites + hepato +
## spiders + bili + chol + albumin + copper + alk.phos + ast +
## trig + platelet + protime, data = daneKNN)
##
## n= 312, number of events= 125
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 2.706e-02 1.027e+00 1.033e-02 2.619 0.008815 **
## sex0 -1.019e-01 9.031e-01 2.972e-01 -0.343 0.731787
## ascites1 3.428e-01 1.409e+00 3.411e-01 1.005 0.314994
## hepato1 4.667e-01 1.595e+00 2.257e-01 2.068 0.038625 *
## spiders1 1.863e-01 1.205e+00 2.222e-01 0.838 0.401818
## bili 8.917e-02 1.093e+00 2.174e-02 4.102 4.1e-05 ***
## chol 1.604e-04 1.000e+00 3.945e-04 0.407 0.684291
## albumin -1.047e+00 3.511e-01 2.608e-01 -4.013 6.0e-05 ***
## copper 2.949e-03 1.003e+00 1.150e-03 2.564 0.010349 *
## alk.phos -2.074e-05 1.000e+00 3.810e-05 -0.544 0.586184
## ast 3.255e-03 1.003e+00 1.731e-03 1.880 0.060082 .
## trig -1.136e-03 9.989e-01 1.253e-03 -0.906 0.364814
## platelet -3.698e-04 9.996e-01 1.118e-03 -0.331 0.740890
## protime 3.053e-01 1.357e+00 8.253e-02 3.700 0.000216 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0274 0.9733 1.0068 1.0484
## sex0 0.9031 1.1072 0.5044 1.6172
## ascites1 1.4088 0.7098 0.7219 2.7494
## hepato1 1.5947 0.6271 1.0247 2.4818
## spiders1 1.2048 0.8300 0.7794 1.8623
## bili 1.0933 0.9147 1.0477 1.1409
## chol 1.0002 0.9998 0.9994 1.0009
## albumin 0.3511 2.8482 0.2106 0.5854
## copper 1.0030 0.9971 1.0007 1.0052
## alk.phos 1.0000 1.0000 0.9999 1.0001
## ast 1.0033 0.9968 0.9999 1.0067
## trig 0.9989 1.0011 0.9964 1.0013
## platelet 0.9996 1.0004 0.9974 1.0018
## protime 1.3571 0.7369 1.1544 1.5953
##
## Concordance= 0.851 (se = 0.019 )
## Likelihood ratio test= 187.6 on 14 df, p=<2e-16
## Wald test = 210.4 on 14 df, p=<2e-16
## Score (logrank) test = 317.7 on 14 df, p=<2e-16
W wyniku tej procedury otrzymano model z 7 zmiennymi objasniajacymi: age,hepato,bili,albumin,copper,ast,protime. Według kryterium AIC zmienna ast mimo p-value < 0,06 powinna znalezc sie w modelu. Pozostałe zmienne na poziomie istotnosci 0,05 sa statystycznie rozne od 0 (czyli istotne), ponieważ p-value < 0,05.
stepAIC(Cox,direction="backward")
## Start: AIC=1120.33
## Surv(time, cens) ~ age + sex + ascites + hepato + spiders + bili +
## chol + albumin + copper + alk.phos + ast + trig + platelet +
## protime
##
## Df AIC
## - platelet 1 1118.4
## - sex 1 1118.5
## - chol 1 1118.5
## - alk.phos 1 1118.6
## - spiders 1 1119.0
## - trig 1 1119.2
## - ascites 1 1119.3
## <none> 1120.3
## - ast 1 1121.7
## - hepato 1 1122.7
## - copper 1 1124.4
## - age 1 1125.2
## - protime 1 1129.6
## - bili 1 1132.2
## - albumin 1 1134.0
##
## Step: AIC=1118.44
## Surv(time, cens) ~ age + sex + ascites + hepato + spiders + bili +
## chol + albumin + copper + alk.phos + ast + trig + protime
##
## Df AIC
## - chol 1 1116.5
## - sex 1 1116.6
## - alk.phos 1 1116.8
## - spiders 1 1117.2
## - trig 1 1117.5
## - ascites 1 1117.5
## <none> 1118.4
## - ast 1 1120.6
## - hepato 1 1121.2
## - copper 1 1122.4
## - age 1 1123.5
## - protime 1 1127.9
## - bili 1 1130.4
## - albumin 1 1132.3
##
## Step: AIC=1116.53
## Surv(time, cens) ~ age + sex + ascites + hepato + spiders + bili +
## albumin + copper + alk.phos + ast + trig + protime
##
## Df AIC
## - sex 1 1114.7
## - alk.phos 1 1114.9
## - spiders 1 1115.3
## - trig 1 1115.5
## - ascites 1 1115.6
## <none> 1116.5
## - ast 1 1119.0
## - hepato 1 1119.3
## - copper 1 1120.4
## - age 1 1121.5
## - protime 1 1125.9
## - bili 1 1130.2
## - albumin 1 1130.3
##
## Step: AIC=1114.7
## Surv(time, cens) ~ age + ascites + hepato + spiders + bili +
## albumin + copper + alk.phos + ast + trig + protime
##
## Df AIC
## - alk.phos 1 1113.1
## - spiders 1 1113.4
## - trig 1 1113.6
## - ascites 1 1113.9
## <none> 1114.7
## - ast 1 1117.5
## - hepato 1 1117.6
## - copper 1 1120.2
## - age 1 1121.0
## - protime 1 1124.0
## - bili 1 1128.2
## - albumin 1 1128.4
##
## Step: AIC=1113.14
## Surv(time, cens) ~ age + ascites + hepato + spiders + bili +
## albumin + copper + ast + trig + protime
##
## Df AIC
## - trig 1 1112.1
## - spiders 1 1112.2
## - ascites 1 1112.7
## <none> 1113.1
## - hepato 1 1116.0
## - ast 1 1116.1
## - copper 1 1118.2
## - age 1 1120.6
## - protime 1 1122.1
## - albumin 1 1126.4
## - bili 1 1126.5
##
## Step: AIC=1112.11
## Surv(time, cens) ~ age + ascites + hepato + spiders + bili +
## albumin + copper + ast + protime
##
## Df AIC
## - ascites 1 1111.0
## - spiders 1 1111.2
## <none> 1112.1
## - hepato 1 1114.9
## - ast 1 1115.7
## - copper 1 1117.2
## - age 1 1120.9
## - protime 1 1121.9
## - bili 1 1125.2
## - albumin 1 1125.2
##
## Step: AIC=1111.05
## Surv(time, cens) ~ age + hepato + spiders + bili + albumin +
## copper + ast + protime
##
## Df AIC
## - spiders 1 1110.4
## <none> 1111.0
## - hepato 1 1113.6
## - ast 1 1114.2
## - copper 1 1118.5
## - protime 1 1121.7
## - age 1 1121.9
## - bili 1 1126.2
## - albumin 1 1128.5
##
## Step: AIC=1110.39
## Surv(time, cens) ~ age + hepato + bili + albumin + copper + ast +
## protime
##
## Df AIC
## <none> 1110.4
## - ast 1 1113.4
## - hepato 1 1114.0
## - copper 1 1119.1
## - age 1 1120.7
## - protime 1 1122.4
## - bili 1 1127.5
## - albumin 1 1129.1
## Call:
## coxph(formula = Surv(time, cens) ~ age + hepato + bili + albumin +
## copper + ast + protime, data = daneKNN)
##
## coef exp(coef) se(coef) z p
## age 0.0326504 1.0331893 0.0092752 3.520 0.000431
## hepato1 0.5080490 1.6620454 0.2174273 2.337 0.019458
## bili 0.0854208 1.0891753 0.0177598 4.810 1.51e-06
## albumin -1.1054157 0.3310732 0.2348614 -4.707 2.52e-06
## copper 0.0032776 1.0032830 0.0009424 3.478 0.000505
## ast 0.0037099 1.0037168 0.0015580 2.381 0.017255
## protime 0.3220291 1.3799250 0.0778588 4.136 3.53e-05
##
## Likelihood ratio test=183.5 on 7 df, p=< 2.2e-16
## n= 312, number of events= 125
Cox1 <- coxph(Surv(time,cens)~age+hepato+bili+albumin+copper+ast+protime, data = daneKNN)
summary(Cox1)
## Call:
## coxph(formula = Surv(time, cens) ~ age + hepato + bili + albumin +
## copper + ast + protime, data = daneKNN)
##
## n= 312, number of events= 125
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.0326504 1.0331893 0.0092752 3.520 0.000431 ***
## hepato1 0.5080490 1.6620454 0.2174273 2.337 0.019458 *
## bili 0.0854208 1.0891753 0.0177598 4.810 1.51e-06 ***
## albumin -1.1054157 0.3310732 0.2348614 -4.707 2.52e-06 ***
## copper 0.0032776 1.0032830 0.0009424 3.478 0.000505 ***
## ast 0.0037099 1.0037168 0.0015580 2.381 0.017255 *
## protime 0.3220291 1.3799250 0.0778588 4.136 3.53e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0332 0.9679 1.0146 1.0521
## hepato1 1.6620 0.6017 1.0853 2.5452
## bili 1.0892 0.9181 1.0519 1.1278
## albumin 0.3311 3.0205 0.2089 0.5246
## copper 1.0033 0.9967 1.0014 1.0051
## ast 1.0037 0.9963 1.0007 1.0068
## protime 1.3799 0.7247 1.1846 1.6074
##
## Concordance= 0.845 (se = 0.019 )
## Likelihood ratio test= 183.5 on 7 df, p=<2e-16
## Wald test = 199.3 on 7 df, p=<2e-16
## Score (logrank) test = 276.9 on 7 df, p=<2e-16
Jeżeli p > 0,05 to zostaly spełnione zalożenia proporcjalosci. Dla zmiennych: bili oraz protime te założenia nie zostały spełnione, więc należy usunąć te zmienne z modelu.
Prop <- cox.zph(Cox1, transform = 'km')
print(Prop)
## chisq df p
## age 0.3590 1 0.5491
## hepato 0.8706 1 0.3508
## bili 7.3198 1 0.0068
## albumin 1.1252 1 0.2888
## copper 0.0145 1 0.9043
## ast 1.9955 1 0.1578
## protime 5.2823 1 0.0215
## GLOBAL 16.9008 7 0.0180
plot(Prop)
### Estymacja modelu z wykorzystaniem tylko istotnych zmiennych
Cox2 <- coxph(Surv(time,cens)~age+hepato+albumin+copper+ast, data = daneKNN)
summary(Cox2)
## Call:
## coxph(formula = Surv(time, cens) ~ age + hepato + albumin + copper +
## ast, data = daneKNN)
##
## n= 312, number of events= 125
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.0330674 1.0336202 0.0088058 3.755 0.000173 ***
## hepato1 0.7046633 2.0231653 0.2112452 3.336 0.000851 ***
## albumin -1.2051939 0.2996339 0.2336910 -5.157 2.51e-07 ***
## copper 0.0046068 1.0046175 0.0008553 5.386 7.19e-08 ***
## ast 0.0047647 1.0047761 0.0012846 3.709 0.000208 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0336 0.9675 1.0159 1.0516
## hepato1 2.0232 0.4943 1.3373 3.0609
## albumin 0.2996 3.3374 0.1895 0.4737
## copper 1.0046 0.9954 1.0029 1.0063
## ast 1.0048 0.9952 1.0022 1.0073
##
## Concordance= 0.824 (se = 0.02 )
## Likelihood ratio test= 140.4 on 5 df, p=<2e-16
## Wald test = 162.9 on 5 df, p=<2e-16
## Score (logrank) test = 174.9 on 5 df, p=<2e-16
reszty <- resid(Cox1, type="scaledsch")
Time <- as.numeric(rownames(reszty))
zmienne <- names(daneKNN[,c(5,8,11,13,14,16,19)])
par(mfrow = c(3,3))
for (i in 1:7) {
plot(log(Time), reszty[,i], xlab="ln(Czas)", main=zmienne[i],
ylab="Skalowane reszty Schoenfelda", pch=20, cex=0.7)
lines(smooth.spline(log(Time), reszty[,i] ), lwd=3 )
}
### Identyfikacja jednostek odstających
W zbiorze znajduje się jedna jednostką odstająca.
deviance <- residuals(Cox2,type="deviance")
s <- Cox2$linear.predictors
plot(s,deviance,xlab="Liniowy predyktor",ylab="Reszty odchylen",cex=0.5, pch=20)
abline(h=c(3,-3),lty=3)
daneKNN$deviance <- deviance
c1 <- which(daneKNN$deviance < c(-3))
dfb <- residuals(Cox2,type="dfbeta")
n <- dim(dfb)[1]
obs.nr <- c(1:n)
par(mfrow = c(2,3))
for (j in 1:5) {
plot(obs.nr,dfb[,j],xlab="Numer jednostki",ylab="Przyrost oceny parametru",
main=zmienne[j])
}
a1 <- which(abs(dfb[,1])>(0.004)) #usunac te jednostki
a2 <- which(abs(dfb[,2])>(0.06))
a3 <- which(abs(dfb[,3])>(0.1))
a4 <- which(abs(dfb[,4])>(0.0004))
a5 <- which(abs(dfb[,5])>(0.001))
c <- sort(unique(c(a1,a2,a3,a4,a5,c1)))
daneKNN_1 <- daneKNN[-c,] #zredukowany zbior
Cox3 <- coxph(Surv(time,cens)~age+hepato+albumin+copper+ast, data = daneKNN_1)
summary(Cox3)
## Call:
## coxph(formula = Surv(time, cens) ~ age + hepato + albumin + copper +
## ast, data = daneKNN_1)
##
## n= 307, number of events= 124
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.0486020 1.0498024 0.0098801 4.919 8.69e-07 ***
## hepato1 0.6956171 2.0049458 0.2086548 3.334 0.000857 ***
## albumin -1.4274764 0.2399136 0.2545430 -5.608 2.05e-08 ***
## copper 0.0064491 1.0064699 0.0009382 6.874 6.25e-12 ***
## ast 0.0082022 1.0082359 0.0016792 4.885 1.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0498 0.9526 1.0297 1.0703
## hepato1 2.0049 0.4988 1.3320 3.0179
## albumin 0.2399 4.1682 0.1457 0.3951
## copper 1.0065 0.9936 1.0046 1.0083
## ast 1.0082 0.9918 1.0049 1.0116
##
## Concordance= 0.836 (se = 0.019 )
## Likelihood ratio test= 180.3 on 5 df, p=<2e-16
## Wald test = 187.2 on 5 df, p=<2e-16
## Score (logrank) test = 217.2 on 5 df, p=<2e-16
stepAIC(Cox3,direction="backward")
## Start: AIC=1093.72
## Surv(time, cens) ~ age + hepato + albumin + copper + ast
##
## Df AIC
## <none> 1093.7
## - hepato 1 1103.4
## - ast 1 1113.2
## - age 1 1116.1
## - albumin 1 1122.8
## - copper 1 1130.0
## Call:
## coxph(formula = Surv(time, cens) ~ age + hepato + albumin + copper +
## ast, data = daneKNN_1)
##
## coef exp(coef) se(coef) z p
## age 0.0486020 1.0498024 0.0098801 4.919 8.69e-07
## hepato1 0.6956171 2.0049458 0.2086548 3.334 0.000857
## albumin -1.4274764 0.2399136 0.2545430 -5.608 2.05e-08
## copper 0.0064491 1.0064699 0.0009382 6.874 6.25e-12
## ast 0.0082022 1.0082359 0.0016792 4.885 1.04e-06
##
## Likelihood ratio test=180.3 on 5 df, p=< 2.2e-16
## n= 307, number of events= 124
### Reszty martyngałowe dla zmiennej age
zmn1 <- daneKNN_1$age
lab1 <- "Age (years)"
reszty1 <- resid(coxph(Surv(time,cens)~1,data=daneKNN_1),type="martingale")
plot(zmn1, reszty1, xlab=lab1,ylab="Martingale residuals",cex=0.6)
lines(lowess(zmn1, reszty1,delta=1),lwd=2)
### Reszty martyngałowe dla zmiennej albumin
zmn2 <- daneKNN_1$albumin
lab2 <- "albumin"
reszty2 <- resid(coxph(Surv(time,cens)~1,data=daneKNN_1),type="martingale")
plot(zmn2, reszty2, xlab=lab2,ylab="Reszty martynga?owe",cex=0.6)
lines(lowess(zmn2, reszty2,delta=1),lwd=2)
zmn3 <- daneKNN_1$copper
lab3 <- "copper"
reszty3 <- resid(coxph(Surv(time,cens)~1,data=daneKNN_1),type="martingale")
plot(zmn3, reszty3, xlab=lab3,ylab="Reszty martynga?owe",cex=0.6)
lines(lowess(zmn3, reszty3,delta=1),lwd=2)
zmn4 <- daneKNN_1$ast
lab4 <- "ast"
reszty4 <- resid(coxph(Surv(time,cens)~1,data=daneKNN_1),type="martingale")
plot(zmn4, reszty4, xlab=lab4,ylab="Reszty martynga?owe",cex=0.6)
lines(lowess(zmn4, reszty4,delta=1),lwd=2)
### Transformacja zmiennej copper
Cox4 <- coxph(Surv(time, cens)~zmn3,data=daneKNN_1)
A1 <- round(AIC(Cox3),2) #akaike z modelu coxa
A1
## [1] 1093.72
cutpoint <- cutp(Cox4)$zmn3 ## optymalny punkt odciecia
c <- (zmn3>=cutpoint$zmn3[1])*1+0 #jezeli zmienna jest >= punkotwi odciecia to przypisuje 1 w przeciwnym wypadku 0
Cox5 <- coxph(Surv(time, cens)~c,data=daneKNN_1)
A2 <- round(AIC(Cox5),2)
A2
## [1] 1201.05
m3 <- mfp(Surv(time,cens)~ fp(zmn3, df = 4, select = 0.05),family=cox, data=daneKNN_1) ## wielomiany ulamkowe
m3
## Call:
## mfp(formula = Surv(time, cens) ~ fp(zmn3, df = 4, select = 0.05),
## data = daneKNN_1, family = cox)
##
##
## Deviance table:
## Resid. Dev
## Null model 1263.992
## Linear model 1191.1
## Final model 1179.526
##
## Fractional polynomials:
## df.initial select alpha df.final power1 power2
## zmn3 4 0.05 0.05 2 0.5 .
##
##
## Transformations of covariates:
## formula
## zmn3 I((zmn3/100)^0.5)
##
## coef exp(coef) se(coef) z p
## zmn3.1 0.22 1.246 0.02197 10.01 0
##
## Likelihood ratio test=84.47 on 1 df, p=0 n= 307
A3 <- round(AIC(m3))
A3
## [1] 1182
int <- quantile(na.omit(zmn3),probs=c(0.05,0.275,.5,.725,.95))
Cox6 <- coxph(Surv(time,cens)~ns(zmn3,knots=int),data=daneKNN_1)
pred <- predict(Cox6,type="terms",se.fit=TRUE, terms=1)
plot(na.omit(zmn3),exp(pred$fit),type="n",xlab=lab3,ylim=c(0,2.5),
ylab="Hazard względny")
lines(smooth.spline(na.omit(zmn3),exp(pred$fit+1.96*pred$se.fit)),lty=2)
lines(smooth.spline(na.omit(zmn3),exp(pred$fit-1.96*pred$se.fit)),lty=2)
lines(smooth.spline(na.omit(zmn3),exp(pred$fit)),lty=1)
abline(h=1,lty=3)
legend('topright',2,c("splajn","95% przedzial ufnosci"), lty=1:2, box.lty=0)
A4 <- round(AIC(Cox6),2)
A4
## [1] 1187.47
Mimo wszystko najlepszym modelem jest model bez transformacji zmiennej copper, czyli:
rbind(A1,A2,A3,A4)
## [,1]
## A1 1093.72
## A2 1201.05
## A3 1182.00
## A4 1187.47
Cox3 <- coxph(Surv(time,cens)~age+hepato+albumin+copper+ast, data = daneKNN_1)
summary(Cox3)
## Call:
## coxph(formula = Surv(time, cens) ~ age + hepato + albumin + copper +
## ast, data = daneKNN_1)
##
## n= 307, number of events= 124
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.0486020 1.0498024 0.0098801 4.919 8.69e-07 ***
## hepato1 0.6956171 2.0049458 0.2086548 3.334 0.000857 ***
## albumin -1.4274764 0.2399136 0.2545430 -5.608 2.05e-08 ***
## copper 0.0064491 1.0064699 0.0009382 6.874 6.25e-12 ***
## ast 0.0082022 1.0082359 0.0016792 4.885 1.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0498 0.9526 1.0297 1.0703
## hepato1 2.0049 0.4988 1.3320 3.0179
## albumin 0.2399 4.1682 0.1457 0.3951
## copper 1.0065 0.9936 1.0046 1.0083
## ast 1.0082 0.9918 1.0049 1.0116
##
## Concordance= 0.836 (se = 0.019 )
## Likelihood ratio test= 180.3 on 5 df, p=<2e-16
## Wald test = 187.2 on 5 df, p=<2e-16
## Score (logrank) test = 217.2 on 5 df, p=<2e-16
r2_1 <- coxr2(Cox1) #model otrzymany metoda krokowa przed sprawdzeniem zalozen proporcjonalnosci
r2_2 <- coxr2(Cox2) #model po odrzuceniu zmiennych niespelniajacych zalozen proporcjonalnosci
r2_3 <- coxr2(Cox3) #model po odrzuceniu jednostek odstajacych i wplywowych (uznany za najlepszy)
round(rbind(r2_1$rsq, r2_2$rsq, r2_3$rsq),2)
## rsq
## [1,] 0.77
## [2,] 0.67
## [3,] 0.77